
/* RETRIEVING REALTYTRAC DATA TO IDENTIFY PAIDOFF LOANS IN ABSNET THAT WERE NOT REFINANCING */

* !! RUN DATA STEPS ONE BY ONE WHEN USING PSEUDO DATASETS !!;

* I. IMPORTING ABSNET LOAN FILE;

PROC IMPORT OUT= WORK.LOANS 
            DATAFILE= "D:\Box\Box\Moussa\Research\Underwater Mortgages\stata_sas_codes\JFQA Codes and Data\output_data\final_sample_loans.dta" 
            DBMS=STATA REPLACE;
RUN;

libname dir "D:\Box\Box\Moussa\Research\Underwater Mortgages\stata_sas_codes\JFQA Codes and Data\output_data";

data dir.final_sample_loans; set loans;
originator=lowcase(originator);
propertycity=lowcase(propertycity);
loanshort=int(originalloanbalance/1000);
appraisalshort=int(originalappraisedvalue/1000);
run;


* II. REALTY TRACT DATA:

* 1. RECORDER DATA;

libname rec "D:\Box\Box\Moussa\Research\Underwater Mortgages\stata_sas_codes\JFQA Codes and Data\input_data"; /* This indexes the recorder input data folder */

libname recd "D:\Box\Box\Moussa\Research\Underwater Mortgages\stata_sas_codes\JFQA Codes and Data\output_data"; /* This indexes the recorder output data folder */

%macro recdata(var);

data recd.&var;
set rec.&var(keep= SR_UNIQUE_ID SR_PROPERTY_ID MM_STATE_CODE MM_FIPS_STATE_CODE MM_FIPS_MUNI_CODE MM_FIPS_COUNTY_NAME SR_MAIL_CITY SR_MAIL_STATE SR_MAIL_ZIP SR_SELLER SR_BUYER 
SR_VAL_TRANSFER SR_DATE_TRANSFER SR_DATE_FILING SR_DOC_TYPE SR_DEED_TYPE SR_TRAN_TYPE SR_QUITCLAIM SR_LOAN_VAL_1 SR_LNDR_LAST_NAME_1 SR_LNDR_FIRST_NAME_1);
if int(SR_DATE_TRANSFER/10000) ge 2000 and int(SR_DATE_TRANSFER/10000) le 2016;

if SR_TRAN_TYPE="R" or SR_TRAN_TYPE="L"; /* Resales and refi only*/
if SR_QUITCLAIM=0;
if SR_LOAN_VAL_1 ge 50000 and SR_LOAN_VAL_1 le 5000000;
SR_BUYER=upcase(SR_BUYER);
SR_SELLER=upcase(SR_SELLER);
SR_LNDR_LAST_NAME_1=upcase(SR_LNDR_LAST_NAME_1);
SR_LNDR_FIRST_NAME_1=upcase(SR_LNDR_FIRST_NAME_1);
run;

%mend;

%recdata(recorder01)
/* loop over all cuts of the recorder data 
%recdata(recorder02)
%recdata(recorder03)
%recdata(recorder04)
%recdata(recorder05) */

data recd.recorderdata;
set recd.recorder01; /* Append all data sets created from the preceding step */
run;

proc sort data=recd.recorderdata out=property_ids(keep= sr_property_id SR_UNIQUE_ID); by SR_PROPERTY_ID; run;

data recd.property_ids; set property_ids; 
by sr_property_id;
if first.sr_property_id;
run;


* 2. ASSESSOR DATA;

libname ass "D:\Box\Box\Moussa\Research\Underwater Mortgages\stata_sas_codes\JFQA Codes and Data\input_data";  /* This indexes the assessor input data folder */

libname assd "D:\Box\Box\Moussa\Research\Underwater Mortgages\stata_sas_codes\JFQA Codes and Data\output_data";  /* This indexes the assessor output data folder */

%macro assdata(var);

proc sort data=ass.&var out=temp(keep= SA_PROPERTY_ID SA_OWNER_1 SA_OWNER_1_FIRST SA_OWNER_1_FIRST SA_OWNER_1_TYPE SA_COMPANY_FLAG 
ASSR_YEAR SA_NBR_UNITS SA_VAL_MARKET SA_APPRAISE_VAL SA_APPRAISE_YR SA_SITE_CITY SA_SITE_STATE SA_SITE_ZIP 
rename= (SA_PROPERTY_ID=SR_PROPERTY_ID)); by SA_PROPERTY_ID; run;

data assd.&var(drop= SR_UNIQUE_ID); 
merge recd.property_ids temp;
by SR_PROPERTY_ID;
if SR_UNIQUE_ID ne .;
if SA_SITE_STATE ne "";
run;

%mend;

%assdata(assessor01) 
/* loop over all cuts of the original assessor data
%assdata(assessor02)
%assdata(assessor03)
%assdata(assessor04)
%assdata(assessor05) */

data assd.assessordata;
set assd.assessor01; /* append all data sets created from the previous loop */
run;

proc sort data=assd.assessordata; by SR_PROPERTY_ID; run;

data check; set assd.assessordata;
by SR_PROPERTY_ID;
if not first.SR_PROPERTY_ID;
run;


* III. MERGING ABSNET AND REALTY;

* 1. MERGING ASSESSOR TO RECORDER DATA TO CAPTURE NBR OF UNITS;

proc sort data=recd.recorderdata; by sr_property_id; run;

data recorder_assessor_data(drop= _date rename= (sa_nbr_units=units )); 
merge recd.recorderdata assd.assessordata(keep= SR_PROPERTY_ID SA_SITE_CITY SA_SITE_STATE SA_SITE_ZIP SA_NBR_UNITS);
by SR_PROPERTY_ID;
if SA_NBR_UNITS ne .;
if SR_UNIQUE_ID ne .;
if SA_NBR_UNITS ge 1 and SA_NBR_UNITS le 4;
zipcode=input(sa_site_zip,5.);
loanamount=int(sr_loan_val_1/1000);
_date=put(sr_date_transfer,8.);
year=input(substr(_date,1,4),4.);
month=input(substr(_date,5,2),2.);
day=input(substr(_date,7,2),2.);
date=mdy(month,day,year);
purchase=1;
if sr_tran_type="L" then purchase=0;
run;

* 2. MERGING ABSNET AND REALTY USING FIRST LIENS, ZIPCODE, LOAN AMOUNT (000s), AND NUMBER OF UNITS;

data absnet;
set dir.final_sample_loans(rename= (propertyzip=zipcode numberofunits=units loanoriginationdate=date));
loanamount=int(originalloanbalance/1000);
purchase=0;
if  loanpurposefk="P" then purchase=1;
if  loanpurposefk ne "";
originator=upcase(originator);
run;

proc sort data=absnet; by absnetloanfk; run;

data absnet_loans(keep= absnetloanfk); set absnet; by absnetloanfk;
if first.absnetloanfk;
run;

* 2.a. Round 1;

proc sort data=absnet; by zipcode purchase units date loanamount; run;

proc sort data=recorder_assessor_data; by zipcode purchase units date loanamount; run;

data matched1; merge absnet(in=i) recorder_assessor_data(in=j); 
by zipcode purchase units date loanamount;
if i=1 and j=1;
run;

proc sort data=matched1; by absnetloanfk; run;

data matched1; set matched1;
by absnetloanfk;
id+1;
if first.absnetloanfk then id=1;
run;

proc means data=matched1 noprint; by absnetloanfk;
var id;
output out=temp1(drop= _type_ _freq_) max= idmax;
run;

data matched1a(where= (idmax=1)) temp2(where= (idmax gt 1)); merge matched1 temp1; by absnetloanfk; run;

data matched1b; set temp2; by absnetloanfk;
if id=2 and SR_BUYER=lag(SR_BUYER) and SR_LNDR_LAST_NAME_1=lag(SR_LNDR_LAST_NAME_1);
run;

data matched1; set matched1a matched1b; run;

* Removing duplicate properties;

proc sort data=matched1; by sr_property_id; run;

proc means data=matched1 noprint;
by sr_property_id;
var sr_property_id;
output out=prop_matched1(drop= _type_ _freq_) n= obs ;
run;


libname match "D:\Box\Box\Moussa\Research\Underwater Mortgages\stata_sas_codes\JFQA Codes and Data\output_data"; /* This is indexing to the same output folder */

data match.matched1(drop= obs); merge matched1 prop_matched1; by sr_property_id;
if obs=1;
run;

proc sort data=match.matched1; by absnetloanfk; run;

data check; set match.matched1; by absnetloanfk;
if first.absnetloanfk;
run;

proc sort data=match.matched1; by sr_property_id; run;

data check; set match.matched1; by sr_property_id;
if first.sr_property_id;
run;

* Not matched loans;

proc sort data=absnet; by absnetloanfk; run; 

proc sort data=match.matched1; by absnetloanfk; run; 

data notmatched_loans1(keep= ABSNetLoanFK date loantypefk loanpurposefk occupancystatusfk originalloanbalance propertystatefk zipcode lienposition originator units originalappraisedvalue cbsa purchasepriceofproperty propertycity loanshort appraisalshort
loanamount purchase); 
merge absnet(in=i) match.matched1(in=j); by absnetloanfk;
if i=1 and j=0;
run;

data check; set notmatched_loans1; by absnetloanfk;
if first.absnetloanfk;
run;

* Not matched properties;

proc sort data=recorder_assessor_data; by sr_property_id; run;

proc sort data=match.matched1; by sr_property_id; run; 

data notmatched_props1; 
merge recorder_assessor_data(in=i) match.matched1(in=j keep= sr_property_id); by sr_property_id;
if i=1 and j=0;
run;

* 2.b. Round 2;

proc sort data=notmatched_loans1; by zipcode purchase units loanamount; run;

proc sort data=notmatched_props1; by zipcode purchase units loanamount; run;

data matched2; merge notmatched_loans1(rename= (date=loanoriginationdate)) notmatched_props1; 
by zipcode purchase units loanamount;
if absnetloanfk ne .;
if sr_property_id ne .;
regist_lag=date-loanoriginationdate;
if regist_lag ge 0 and regist_lag le 60;
run;

* Excluding loans and properties matched in round 1;

proc sort data=match.matched1; by sr_property_id; run; 

proc sort data=matched2; by sr_property_id; run; 

data matched2; merge matched2(in=i) match.matched1(in=j keep= sr_property_id);
by sr_property_id;
if i=1 and j=0;
run;

proc sort data=match.matched1; by absnetloanfk; run; 

proc sort data=matched2; by absnetloanfk; run; 

data matched2; merge matched2(in=i) match.matched1(in=j keep= absnetloanfk);
by absnetloanfk;
if i=1 and j=0;
run;

* Selecting matches;

proc sort data=matched2; by absnetloanfk regist_lag; run;

data temp1; set matched2;
by absnetloanfk;
id+1;
if first.absnetloanfk then id=1;
run;

proc means data=temp1 noprint; by absnetloanfk;
var id;
output out=ids(drop= _type_ _freq_) max= idmax;
run;

* First match;

data matched2a; merge temp1 ids; by absnetloanfk; 
if idmax=1;
run;

data check; set matched2a; by absnetloanfk; 
if first.absnetloanfk;
run;

proc sort data=matched2a; by sr_property_id; run;

data check; set matched2a; by sr_property_id;
if first.sr_property_id;
run;

proc means data=matched2a noprint; by sr_property_id;
var id;
output out=ids(drop= _type_ _freq_) n= nobs;
run;

data matched2a(drop= nobs); merge matched2a ids; by sr_property_id;
if nobs=1;
run;

data check; set matched2a; by sr_property_id;
if first.sr_property_id;
run;

* Second match;

proc sort data=notmatched_loans1; by absnetloanfk; run; 

proc sort data=matched2a; by absnetloanfk; run; 

data notmatched_loans2; merge notmatched_loans1(in=i) matched2a(in=j keep= absnetloanfk); by absnetloanfk;
if i=1 and j=0;
run;

proc sort data=notmatched_props1; by sr_property_id; run;

proc sort data=matched2a; by sr_property_id; run; 

data notmatched_props2; merge notmatched_props1(in=i) matched2a(in=j keep= sr_property_id); by sr_property_id;
if i=1 and j=0;
run;


proc sort data=notmatched_loans2; by zipcode purchase units loanamount; run;

proc sort data=notmatched_props2; by zipcode purchase units loanamount; run;

data matched2; merge notmatched_loans2(rename= (date=loanoriginationdate)) notmatched_props2; 
by zipcode purchase units loanamount;
if absnetloanfk ne .;
if sr_property_id ne .;
regist_lag=date-loanoriginationdate;
if regist_lag ge 0 and regist_lag le 60;
run;


proc sort data=matched2; by absnetloanfk regist_lag; run;

data temp2; set matched2;
by absnetloanfk;
id+1;
if first.absnetloanfk then id=1;
run;

proc means data=temp2 noprint; by absnetloanfk;
var id;
output out=ids(drop= _type_ _freq_) max= idmax;
run;

data matched2b(where= (idmax=1)) temp3(where= (idmax gt 1)); merge temp2 ids; by absnetloanfk; run;


data check; set matched2b; by absnetloanfk; 
if first.absnetloanfk;
run;

proc sort data=matched2b; by sr_property_id; run;

data check; set matched2b; by sr_property_id;
if first.sr_property_id;
run;

proc means data=matched2b noprint; by sr_property_id;
var id;
output out=ids(drop= _type_ _freq_) n= nobs;
run;

data matched2b(drop= nobs); merge matched2b ids; by sr_property_id;
if nobs=1;
run;

data check; set matched2b; by sr_property_id;
if first.sr_property_id;
run;


data matched2c; set temp3; by absnetloanfk;
if id=2 and SR_BUYER=lag(SR_BUYER) and SR_LNDR_LAST_NAME_1=lag(SR_LNDR_LAST_NAME_1) and SR_BUYER ne "" and SR_LNDR_LAST_NAME_1 ne "";
run;

data matched2; set matched2a matched2b matched2c; run;

* Removing Duplicates;

proc sort data=matched2; by sr_property_id; run;

proc means data=matched2 noprint;
by sr_property_id;
var sr_property_id;
output out=obs_id(drop= _type_ _freq_) n= obs ;
run;

data match.matched2(drop= obs); merge matched2 obs_id; by sr_property_id;
if obs=1;
run;

data check; set match.matched2(keep= sr_property_id); by sr_property_id; 
if first.sr_property_id;
run;

proc sort data=match.matched2; by absnetloanfk; run;

data check; set match.matched2(keep= absnetloanfk); by absnetloanfk; 
if first.absnetloanfk;
run;

* Deleting duplicate properties with previous round;

proc sort data= match.matched1; by sr_property_id; run;

proc sort data= match.matched2; by sr_property_id; run;

data match.matched2;; merge match.matched2(in=j) match.matched1(keep= sr_property_id in=i) ; by sr_property_id;
if j=1 and i=0 ;
run;


* 2.c. Round 3;

proc sort data=notmatched_loans1; by absnetloanfk; run; 

proc sort data=match.matched2; by absnetloanfk; run; 

data notmatched_loans3; merge notmatched_loans1(in=i) match.matched2(in=j keep= absnetloanfk); by absnetloanfk;
if i=1 and j=0;
run;

proc sort data=notmatched_props1; by sr_property_id; run;

proc sort data=match.matched2; by sr_property_id; run; 

data notmatched_props3; merge notmatched_props1(in=i) match.matched2(in=j keep= sr_property_id); by sr_property_id;
if i=1 and j=0;
run;


proc sort data=notmatched_loans3; by zipcode purchase units loanamount; run;

proc sort data=notmatched_props3; by zipcode purchase units loanamount; run;

data matched3; merge notmatched_loans3(rename= (date=loanoriginationdate)) notmatched_props3; 
by zipcode purchase units loanamount;
if absnetloanfk ne .;
if sr_property_id ne .;
regist_lag=date-loanoriginationdate;
if regist_lag ge 0 and regist_lag le 60;
run;


proc sort data=matched3; by absnetloanfk regist_lag; run;

data temp2; set matched3;
by absnetloanfk;
id+1;
if first.absnetloanfk then id=1;
run;

proc means data=temp2 noprint; by absnetloanfk;
var id;
output out=ids(drop= _type_ _freq_) max= idmax;
run;

data matched3a(where= (idmax=1)) temp3(where= (idmax gt 1)); merge temp2 ids; by absnetloanfk; run;


data check; set matched3a; by absnetloanfk; 
if first.absnetloanfk;
run;

proc sort data=matched3a; by sr_property_id; run;

data check; set matched3a; by sr_property_id;
if first.sr_property_id;
run;

proc means data=matched3a noprint; by sr_property_id;
var id;
output out=ids(drop= _type_ _freq_) n= nobs;
run;

data matched3a(drop= nobs); merge matched3a ids; by sr_property_id;
if nobs=1;
run;

data check; set matched3a; by sr_property_id;
if first.sr_property_id;
run;


data matched3b; set temp3; by absnetloanfk;
if id=2 and SR_BUYER=lag(SR_BUYER) and SR_LNDR_LAST_NAME_1=lag(SR_LNDR_LAST_NAME_1) and SR_BUYER ne "" and SR_LNDR_LAST_NAME_1 ne "";
run;

data check; set matched3b; by absnetloanfk; 
if first.absnetloanfk;
run;

proc sort data=matched3b; by sr_property_id; run;

data check; set matched3b; by sr_property_id;
if first.sr_property_id;
run;

proc means data=matched3b noprint; by sr_property_id;
var id;
output out=ids(drop= _type_ _freq_) n= nobs;
run;

data matched3b(drop= nobs); merge matched3b ids; by sr_property_id;
if nobs=1;
run;

data check; set matched3b; by sr_property_id;
if first.sr_property_id;
run;


data matched3; set matched3a matched3b; run;

* Removing Duplicates;

proc sort data=matched3; by sr_property_id; run;

proc means data=matched3 noprint;
by sr_property_id;
var sr_property_id;
output out=obs_id(drop= _type_ _freq_) n= obs ;
run;

data match.matched3(drop= obs); merge matched3 obs_id; by sr_property_id;
if obs=1;
run;

data check; set match.matched3(keep= sr_property_id); by sr_property_id; 
if first.sr_property_id;
run;

proc sort data=match.matched3; by absnetloanfk; run;

data check; set match.matched3(keep= absnetloanfk); by absnetloanfk; 
if first.absnetloanfk;
run;

* Deleting duplicate properties with previous rounds;

proc sort data= match.matched1; by sr_property_id; run;

proc sort data= match.matched2; by sr_property_id; run;

proc sort data= match.matched3; by sr_property_id; run;

data match.matched3; merge  match.matched3(in=k) match.matched2(in=j keep= sr_property_id) match.matched1(keep= sr_property_id in=i) ; by sr_property_id;
if k=1 and j=0 and i=0;
run;


* 2.d. Final round;

* Unmatched loans and properties;

proc sort data=notmatched_loans3; by absnetloanfk; run; 

proc sort data=match.matched3; by absnetloanfk; run; 

data notmatched_loans4; merge notmatched_loans3(in=i) match.matched3(in=j keep= absnetloanfk); by absnetloanfk;
if i=1 and j=0;
run;

proc sort data=notmatched_props3; by sr_property_id; run;

proc sort data=match.matched3; by sr_property_id; run; 

data notmatched_props4; merge notmatched_props3(in=i) match.matched3(in=j keep= sr_property_id); by sr_property_id;
if i=1 and j=0;
run;

* Matching in registration data no more 60 days from origination;

proc sort data=notmatched_loans4; by zipcode purchase units loanamount; run;

proc sort data=notmatched_props4; by zipcode purchase units loanamount; run;

data matched4; merge notmatched_loans4(rename= (date=loanoriginationdate)) notmatched_props4; 
by zipcode purchase units loanamount;
if absnetloanfk ne .;
if sr_property_id ne .;
regist_lag=date-loanoriginationdate;
if regist_lag ge 0 and regist_lag le 60;
run;

* Keeping unique matches;

proc sort data=matched4; by absnetloanfk regist_lag; run;

data temp1; set matched4; by absnetloanfk;
id+1;
if first.absnetloanfk then id=1;
run;

proc means data=temp1 noprint; by absnetloanfk;
var id;
output out=ids(drop= _type_ _freq_) max= idmax;
run;

data matched4a(where= (idmax=1)) temp2(where= (idmax ge 2)); merge temp1 ids; by absnetloanfk; run;


data check; set matched4a; by absnetloanfk;
if first.absnetloanfk;
run;

proc sort data=matched4a; by sr_property_id; run;

data check; set matched4a; by Sr_property_id;
if first.sr_property_id;
run;

proc means data=matched4a noprint;
by sr_property_id;
var sr_property_id;
output out=ids(drop= _type_ _freq_) n= obs ;
run;

data matched4a(drop= obs); merge matched4a ids; by sr_property_id;
if obs=1;
run;

data check; set matched4a(keep= sr_property_id); by sr_property_id; 
if first.sr_property_id;
run;

* Keeping first match if next match at least one week after the first;

data ids(where= (x=1) keep= absnetloanfk x); set temp2;
if id=2 and regist_lag ge lag(regist_lag)+7 then x=1;
run;

data matched4b(drop= x); merge temp2 ids; by absnetloanfk;
if id=1 and x=1;
run;


data check; set matched4b; by absnetloanfk;
if first.absnetloanfk;
run;

proc sort data=matched4b; by sr_property_id; run;

data check; set matched4b; by Sr_property_id;
if first.sr_property_id;
run;

proc means data=matched4b noprint;
by sr_property_id;
var sr_property_id;
output out=ids(drop= _type_ _freq_) n= obs ;
run;

data matched4b(drop= obs); merge matched4b ids; by sr_property_id;
if obs=1;
run;

data check; set matched4b(keep= sr_property_id); by sr_property_id; 
if first.sr_property_id;
run;


data matched4; set matched4a matched4b; run;

* Removing Duplicates;

proc sort data=matched4; by sr_property_id; run;

proc means data=matched4 noprint;
by sr_property_id;
var sr_property_id;
output out=obs_id(drop= _type_ _freq_) n= obs ;
run;

data match.matched4(drop= obs); merge matched4 obs_id; by sr_property_id;
if obs=1;
run;

data check; set match.matched4(keep= sr_property_id); by sr_property_id; 
if first.sr_property_id;
run;

proc sort data=match.matched4; by absnetloanfk; run;

data check; set match.matched4(keep= absnetloanfk); by absnetloanfk; 
if first.absnetloanfk;
run;

* Deleting duplicate properties with previous rounds;

proc sort data= match.matched1; by sr_property_id; run;

proc sort data= match.matched2; by sr_property_id; run;

proc sort data= match.matched3; by sr_property_id; run;

proc sort data= match.matched4; by sr_property_id; run;

data match.matched4; merge  match.matched4(in=h) match.matched3(in=k keep= sr_property_id)  match.matched2(in=j keep= sr_property_id) match.matched1(keep= sr_property_id in=i) ; by sr_property_id;
if h=1 and k=0 and j=0 and i=0;
run;


* 2.e. Matched loans;

data match.absnet_realtytrac_matched(drop= id idmax); set match.matched1 match.matched2 match.matched3 match.matched4; run;

* Checking if properties and loans uniquely identified;

proc sort data=match.absnet_realtytrac_matched; by sr_property_id; run;

data check; set match.absnet_realtytrac_matched; by sr_property_id;
if first.sr_property_id;
run;

proc sort data=match.absnet_realtytrac_matched; by absnetloanfk; run;

data check; set match.absnet_realtytrac_matched; by absnetloanfk;
if first.absnetloanfk;
run;

* Matched properties and loans;

data properties(keep= sr_property_id zipcode propertystatefk); set match.absnet_realtytrac_matched; run;

proc sort data=properties; by sr_property_id; run;

data match.properties; set properties; by sr_property_id; if first.sr_property_id; run;


data loans(keep= absnetloanfk); set match.absnet_realtytrac_matched; run;

proc sort data=loans; by absnetloanfk; run;

data match.loans; set loans; by absnetloanfk; if first.absnetloanfk; run;


* III. IDENTIFYING NEXT TRANSACTIONS ON PROPERTIES MATCHED TO LOANS THAT PAID OFF;

PROC IMPORT OUT= WORK.LOAN_ENDS 
            DATAFILE= "D:\Box\Box\Moussa\Research\Underwater Mortgages\stata_sas_codes\JFQA Codes and Data\output_data\final_sample_loans_enddate.dta" 
            DBMS=STATA REPLACE;
RUN;

data check; set loan_ends;
if repdate=.;
run;

proc sort data=loan_ends; by absnetloanfk; run;

proc sort data=match.absnet_realtytrac_matched; by absnetloanfk; run;

data temp1; merge match.absnet_realtytrac_matched(in=i) loan_ends(in=j);
by absnetloanfk; 
if i=1 and j=1;
if paidoff=1;
run;

proc sort data=temp1; by sr_property_id; run;

proc sort data=recorder_assessor_data; by sr_property_id; run;

data temp2; 
merge temp1(in=i) recorder_assessor_data(in=j keep= SR_UNIQUE_ID SR_PROPERTY_ID SR_DATE_TRANSFER SR_BUYER SR_SELLER SR_TRAN_TYPE SR_LNDR_LAST_NAME_1 zipcode loanamount year month day purchase
rename= (SR_UNIQUE_ID=SR_UNIQUE_ID_2 SR_DATE_TRANSFER=SR_DATE_TRANSFER_2 SR_BUYER=SR_BUYER_2 SR_SELLER=SR_SELLER_2 SR_TRAN_TYPE=SR_TRAN_TYPE_2 
SR_LNDR_LAST_NAME_1=SR_LNDR_LAST_NAME_2 zipcode=zipcode_2 loanamount=loanamount_2 year=year_2 month=month_2 day=day_2 purchase=purchase_2)); 
by sr_property_id;
if i=1 and j=1;
run;

proc sort data=temp2; by absnetloanfk sr_property_id; run;

data temp3; set temp2;
if absnetloanfk=lag(absnetloanfk) and sr_property_id=lag(sr_property_id) and SR_DATE_TRANSFER_2=lag(SR_DATE_TRANSFER_2) and loanamount_2=lag(loanamount_2) then delete;
date_2=(year_2-1960)*12+month_2;
enddate_diff=date_2-repdate;
*if abs(enddate_diff) le 2;
if enddate_diff ge -2;
run;

proc sort data=temp3; by absnetloanfk enddate_diff; run;

data temp4; set temp3; by absnetloanfk;
id+1;
if first.absnetloanfk then id=1;
run;

data temp5; set temp4; by absnetloanfk;
if first.absnetloanfk;
run;

proc sort data=match.absnet_realtytrac_matched; by absnetloanfk; run;

data match.absnet_rltytrac_matched_twice_RR; 
merge match.absnet_realtytrac_matched temp5(keep= absnetloanfk SR_UNIQUE_ID_2 SR_BUYER_2 SR_SELLER_2 SR_DATE_TRANSFER_2 SR_TRAN_TYPE_2 SR_LNDR_LAST_NAME_2 zipcode_2 loanamount_2 year_2 month_2 day_2
purchase_2 date_2 enddate_diff);
by absnetloanfk;
run;


* IV. GETTING TAKE OUT TRANSACTIONS FOR PROPERTIES FOR PAIDOFFS NOT MATCHED TWICE IN THE PAPER;

data temp3bis; set temp2;
if absnetloanfk=lag(absnetloanfk) and sr_property_id=lag(sr_property_id) and SR_DATE_TRANSFER_2=lag(SR_DATE_TRANSFER_2) and loanamount_2=lag(loanamount_2) then delete;
date_2=(year_2-1960)*12+month_2;
enddate_diff=date_2-repdate;
if abs(enddate_diff) le 2;
*if enddate_diff ge -2;
run;

proc sort data=temp3bis; by absnetloanfk descending loanamount_2; run;

data temp4bis; set temp3bis; by absnetloanfk;
id+1;
if first.absnetloanfk then id=1;
run;

data temp5bis; set temp4bis; by absnetloanfk;
if first.absnetloanfk;
run;

/* Paidoffs not Matched */

proc sort data=temp1; by absnetloanfk; run;

data match.notmatched_paidoffs notmatched_paidoffs(keep= absnetloanfk date originalloanbalance originalappraisedvalue zipcode SR_UNIQUE_ID SR_PROPERTY_ID MM_STATE_CODE repyear repmonth repdate paidoff); 
merge temp1(in=i) temp5bis(in=j keep= absnetloanfk); by absnetloanfk;
if i=1 and j=0;
run;


libname asshstry "D:\Box\Box\Moussa\Research\Underwater Mortgages\stata_sas_codes\JFQA Codes and Data\input_data"; /* This should point to the location of the RealtyTrac Assessor History Data */

libname hstryout "D:\Box\Box\Moussa\Research\Underwater Mortgages\stata_sas_codes\JFQA Codes and Data\output_data";

proc sort data=notmatched_paidoffs out=temp(rename= (SR_PROPERTY_ID=SA_PROPERTY_ID)); by SR_PROPERTY_ID; run;


%macro asshist(var);

proc sort data=asshstry.&var; by SA_PROPERTY_ID; run;

data hstryout.&var;
merge temp(in=i) asshstry.&var(in=j keep= SA_PROPERTY_ID SA_DATE_TRANSFER SA_VAL_TRANSFER);
by SA_PROPERTY_ID;
if i=1 and j=1;
if SA_DATE_TRANSFER ne .;
run;

%mend;

%asshist(assessorhistory_2008_0) 
/* Loop over the various cut of the RealtyTrac Assessor History Data
%asshist(assessorhistory_2008_1)
%asshist(assessorhistory_2008_2)
%asshist(assessorhistory_2008_3)
%asshist(assessorhistory_2008_4)
%asshist(assessorhistory_2008_5)
%asshist(assessorhistory_2008_6)
%asshist(assessorhistory_2008_7)
%asshist(assessorhistory_2008_8)
%asshist(assessorhistory_2008_9)*/

data hstryout.assessorhistory_combined;
set hstryout.assessorhistory_2008_0; /* append all data sets created from the previous loop */
run;

proc sort data=hstryout.assessorhistory_combined; by absnetloanfk SA_PROPERTY_ID SA_DATE_TRANSFER; run;


/* COMPUTE TIME BETWEEN LOAN TERMINATION DATE AND SUBSEQUENT TRANSACTIONS ON SAME PROPERTY */

data assessorhistory; set hstryout.assessorhistory_combined; by absnetloanfk SA_PROPERTY_ID SA_DATE_TRANSFER;
if first.SA_DATE_TRANSFER;
trans_date=put(SA_DATE_TRANSFER,8.);
trans_year=input(substr(trans_date,1,4),4.);
trans_month=input(substr(trans_date,5,2),2.);
date_2=(trans_year-1960)*12+trans_month;
enddate_diff=date_2-repdate;
orig_year=year(date);
orig_month=month(date);
origdate=(orig_year-1960)*12+orig_month;
origdate_diff=date_2-origdate;
run;

/* Remove Transactions Corresponding to Loan Origination Transactions or Prior */

data temp6; set assessorhistory;
if origdate_diff gt 0;
run;

proc sort data=temp6; by absnetloanfk SA_PROPERTY_ID SA_DATE_TRANSFER; run;

data temp7; set temp6; by absnetloanfk SA_PROPERTY_ID;
id+1;
if first.absnetloanfk then id=1;
run;

data check; set temp7; by absnetloanfk SA_PROPERTY_ID; if first.absnetloanfk; run;

data temp7bis; set temp6; by absnetloanfk;
if first.absnetloanfk;
run;


* ADDING ADDITIONAL SECOND MATCHED FROM ASSESSOR HISTORY DATA ;

libname input "D:\Box\Box\Moussa\Research\Underwater Mortgages\stata_sas_codes\JFQA Codes and Data\input_data";

data input.Absnet_rltytr_match_twice_rr_xd; 
merge match.Absnet_rltytrac_matched_twice_rr 
temp7bis(keep= ABSNetLoanFK SA_PROPERTY_ID trans_date trans_year trans_month date_2 enddate_diff origdate origdate_diff 
rename= (trans_date=trans_date_asshist trans_year=trans_year_asshist trans_month=trans_month_asshist date_2=date_2_asshist enddate_diff=enddate_diff_asshist
origdate=origdate_asshist origdate_diff=origdate_diff_asshist));
by ABSNetLoanFK;
run;

* END;

